Prepare the data

This data is STDIF structure - a class for unstructured spatio-temporal data, with n spatial locations and times, where n observations are available, from the spacetime package.

The 0s actually reflect that there were no observations at that time - in other words, they are not false absences, and should not be removed.

## class       : SpatialPoints 
## features    : 1 
## extent      : 1189877, 1189877, 459784, 459784  (xmin, xmax, ymin, ymax)
## crs         : +proj=lcc +lat_0=44 +lon_0=-70 +lat_1=50 +lat_2=46 +x_0=800000 +y_0=0 +datum=NAD83 +units=m +no_defs
##      timeIndex
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         5
## [6,]         6
## [1] "1971-09-07 08:25:00 ADT" "1971-09-07 11:15:00 ADT"
## [3] "1971-09-07 12:45:00 ADT" "1971-09-07 15:05:00 ADT"
## [5] "1971-09-08 08:25:00 ADT" "1971-09-08 10:25:00 ADT"

Data exploration

White hake abundances

Through time

plot(number@endTime, number@data$`WHITE HAKE`, type = "l")

In space

plot(number@sp, pch = 19, cex = number@data$`WHITE HAKE`/300)
plot(gulf, add = TRUE)

Explanatory variables

# function to add coordinates to dataset in preparation for conversion to sf
add_coords <- function(stidf_object){
  # assign datasets to objects to simplify the code a bit
  df <- stidf_object@data 
  coords <- stidf_object@sp@coords

  # add coordinate columns to dataset
  df <- mutate(df,
           longitude = coords[,1],
           latitude = coords[,2],
           time = lubridate::date(stidf_object@endTime),
           year = lubridate::year(stidf_object@endTime)
           )
  return(df)
}

# convert datasets to sf objects

explan_sf <- add_coords(explan) %>%
  st_as_sf(x = ., coords = c("longitude", "latitude"))
st_crs(explan_sf) <- explan@sp@proj4string # set coordinate ref system

gulf_sf <- st_as_sf(gulf)
st_crs(gulf_sf) <- gulf@proj4string # set coordinate ref system

# maps

# ggplot map of explanatory variable
p1 <- ggplot() +
  geom_sf(data = gulf_sf, fill = "white") +
  geom_sf(data = filter(explan_sf, 
                        # pick a sequence of years to check out
                        year %in% c(1971, 1981, 1992, 2001, 2011, 2019)),
          aes(col = bottom.temperature)) +
  facet_wrap(~year) +
  scale_color_viridis_c(option = "viridis") +
  labs(color = "Bottom \ntemperature") +
  theme_void() 

p2 <- ggplot() +
  geom_sf(data = gulf_sf, fill = "white") +
  geom_sf(data = filter(explan_sf, 
                        # pick a sequence of years to check out
                        year %in% c(1971, 1981, 1992, 2001, 2011, 2019)),
          aes(col = bottom.salinity)) +
  facet_wrap(~year) +
  scale_color_viridis_c(option = "viridis") +
  labs(color = "Bottom \nsalinity") +
  theme_void()

p3 <- ggplot() +
  geom_sf(data = gulf_sf) +
  geom_sf(data = filter(explan_sf, 
                        # pick a sequence of years to check out
                        year %in% c(1971, 1981, 1992, 2001, 2011, 2019)),
          aes(col = depth)) +
  facet_wrap(~year) +
  scale_color_viridis_c(option = "viridis") +
  labs(color = "Depth") +
  theme_void() 

# patchwork together!
p1 / p2 / p3

Step 1 - Building the meshes

This mesh size reflects the range of coordinates in space. Changing to a smaller value, like 35000, gives a more appropriate mesh.

maxEdge <- 25000
meshSpace <- inla.mesh.2d(boundary = gulf,
                          max.edge = maxEdge*c(0.5,2),
                          offset = c(10000,20000),
                          cutoff = maxEdge/2)

# plot points under the mesh, to see if it makes sense
par(mar = c(1,1,1,1))
plot(number@sp, pch = 19, cex = 0.2, col = "red")
plot(gulf, add = TRUE)
plot(meshSpace, main = "", asp = 1, add = TRUE)

# plot abundances under the mesh too
plot(number@sp, pch = 19, cex = number@data$`WHITE HAKE`/300, col = "red")
plot(gulf, add = TRUE)
plot(meshSpace, main = "", asp = 1, add = TRUE)

# Number of edges in the mesh
meshSpace$n
## [1] 470
### Time
time <- as.numeric(number@endTime)/60 # Number of minutes since Jan 1, 1970 

timeBasis <- time - 884844 # Time starting at 1

# Division points
 timeGr <- 11
 timeSeq <- seq(min(timeBasis),
                max(timeBasis),
                length = timeGr)
meshTime <- inla.mesh.1d(timeSeq)
# ^ This divides time into 11 bins of minutes since 1970.
#timeGr <- 2

# try with two extreme time points, just to simplify the model run
#meshTime <- inla.mesh.1d(c(min(timeBasis), max(timeBasis)))

How many estimations will be carried out?

meshSpace$n * meshTime$n
## [1] 5170

Step 2 - Define the stochastic partial differential equation

## [1] 34.64238

Step 3 - Priors and hyperparameters for temporal autocorrelation

# Temporal autocorrelation prior
hSpec <- list(theta=list(prior='pccor1', param=c(0.2, 0.9))) 
# assuming not a lot of temporal autocorrelation (0.2) and we are fairly sure of that (0.9)

# Precision likelihood 
## (negative binomial or any distribution that has more than one parameter)
# precPrior <- list(prior='pc.prec', param=c(1, 0.01))
# guessing it's 1 with a confidence of .01, which is super low.

# note ^ is not useful if using a Poisson.

Step 4 - Index matrix

Field <- inla.spde.make.index("field", 
                              n.spde = SPDE$n.spde,
                              n.group = meshTime$n)

Step 5 - A matrix

# For estimation
A <- inla.spde.make.A(meshSpace,
                      loc = coordinates(number@sp),
                      group = timeBasis,
                      group.mesh = meshTime)

Step 6 - Organise the A matrix into a list

Alist <- as.list(rep(1,4)) # list with single values
Alist[[1]] <- A

Step 7 - Organise the effects (spatial autocorrelation structure and explanatory variables)

Here, we’re using two variables about the bottom conditions, because White hake is a groundfish.

effect <- list(Field,
               bTemp =  explan@data$bottom.temperature,
               bSal = explan@data$bottom.salinity,
               bDep = explan@data$depth)

Step 8 - Build stack

Stack <- inla.stack(data=list(whitehake = number@data$`WHITE HAKE`),
                    A = Alist,
                    effects = effect,
                    tag="basis")

Step 9 - Building the model (Finally!)

Model selection

# build candidate models --- only run with two time points. otherwise, eternal
form_0 <- whitehake ~ 0 +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_1 <- whitehake ~ 0 + bTemp +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_2 <- whitehake ~ 0 + bSal +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_3 <- whitehake ~ 0 + bDep +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_4 <- whitehake ~ 0 + bTemp + bSal +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_5 <- whitehake ~ 0 + bTemp + bDep +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_6 <- whitehake ~ 0 + bSal + bDep +
  f(field, model=SPDE, group = field.group, 
    control.group=list(model='ar1', hyper=hSpec)) 
form_7 <- whitehake ~ 0 + bTemp + bSal + bDep +
  f(field, model=SPDE, group = field.group, # how they're grouped
    control.group=list(model='ar1', hyper=hSpec)) # build with model parameters
forms_ls <- list(form_0, form_1, form_2, form_3, form_4, form_5, form_6, form_7)

# evaluate each model
model_ls <- vector("list", length = length(forms_ls))
for(i in 1:length(model_ls)){
  model_ls[[i]] <- inla(forms_ls[[i]],
                        data = inla.stack.data(Stack),
                        family="gaussian",
                        control.family = list(link="identity"),
                        control.predictor = list(A = inla.stack.A(Stack), 
                                                 compute = TRUE, 
                                                 link = 1),
                        control.compute = list(waic = TRUE, config = TRUE))
}

# extract waic to compare
waic_df <- data.frame(model = 1:7, waic = NA)
for(i in 1:nrow(waic_df)){
  waic_df[i,2] <- model_ls[[i]]$waic$waic
}
# find lowest waic (it's model #4)
waic_df[which(waic_df$waic == min(waic_df$waic)),]

#saveRDS(model_gaussian, "outputs/whitehake_model.RDS")

Build chosen model

form <- whitehake ~ 0 + bTemp + bSal +
        f(field, # temporal autocorr
          model=SPDE, # spatial autocorr structure
          group = field.group, # how they're grouped
          control.group=list(model='ar1', hyper=hSpec) # build with model parameters
          )

model_gaussian <- inla(form,
              data = inla.stack.data(Stack),
              family="gaussian",
              control.family = list(link="identity"),
              control.predictor = list(A = inla.stack.A(Stack), 
                                       compute = TRUE, 
                                       link = 1),
              control.compute = list(waic = TRUE,
                                     config = TRUE))

Checking and interpreting the model

summary(model_gaussian)
## 
## Call:
##    c("inla(formula = form, family = \"gaussian\", data = 
##    inla.stack.data(Stack), ", " control.compute = list(waic = TRUE, config 
##    = TRUE), control.predictor = list(A = inla.stack.A(Stack), ", " compute 
##    = TRUE, link = 1), control.family = list(link = \"identity\"))" ) 
## Time used:
##     Pre = 5.64, Running = 312, Post = 3.38, Total = 321 
## Fixed effects:
##         mean    sd 0.025quant 0.5quant 0.975quant   mode kld
## bTemp  1.215 0.158      0.905    1.215      1.524  1.215   0
## bSal  -0.054 0.027     -0.107   -0.054     -0.001 -0.054   0
## 
## Random effects:
##   Name     Model
##     field SPDE2 model
## 
## Model hyperparameters:
##                                             mean      sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.00e-03   0.000   1.00e-03 1.00e-03
## Range for field                         1.13e+04 191.155   1.09e+04 1.13e+04
## Stdev for field                         3.25e+01   0.480   3.16e+01 3.25e+01
## GroupRho for field                      7.47e-01   0.006   7.34e-01 7.48e-01
##                                         0.975quant     mode
## Precision for the Gaussian observations   1.00e-03 1.00e-03
## Range for field                           1.17e+04 1.13e+04
## Stdev for field                           3.35e+01 3.24e+01
## GroupRho for field                        7.57e-01 7.50e-01
## 
## Expected number of effective parameters(stdev): 692.70(13.71)
## Number of equivalent replicates : 9.51 
## 
## Watanabe-Akaike information criterion (WAIC) ...: 63501.23
## Effective number of parameters .................: 713.59
## 
## Marginal log-Likelihood:  -32011.66 
## Posterior marginals for the linear predictor and
##  the fitted values are computed
Efxplot(list(model_gaussian)) + theme_classic()
## Loading required package: MCMCglmm
## Loading required package: coda
## Loading required package: ape
## 
## Attaching package: 'ape'
## The following objects are masked from 'package:raster':
## 
##     rotate, zoom

# from Coding Club tutorial: "NB: There are no P values in INLA. Importance or significance of variables can be deduced by examining the overlap of their 2.5% and 97.5% posterior estimates with zero."
ExpY <- model_gaussian$summary.fitted.values[1:nrow(number@data),"mean"]

#For the variance we also need the posterior mean value of θ, which is obtained via
phi1 <- model_gaussian$summary.hyper[1, "mean"]

#The variance is then given by
VarY <- ExpY * (1 - ExpY) / (1 + phi1)

# And once we have the mean and the variance we can calculate Pearson residuals.
E1 <- (number@data$`WHITE HAKE` - ExpY) / sqrt(VarY)
## Warning in sqrt(VarY): NaNs produced
# Apply model validation
# Figure 23.11
par(mfrow = c(1,1), mar = c(5,5,2,2), cex.lab = 1.5)
plot(x = model_gaussian$summary.fitted.values$mean[1:nrow(number@data)], 
     y = E1,
     xlab = "Fitted values",
     ylab = "Pearson residuals",
     xlim = c(0, 1))
abline(h = 0, lty = 2)

# note: there's a value at 130...

Fixed effects

This is the output you would get from a normal linear model. - mode: mode of the parameters distribution - kld: ????

Random effects

Field group is the random effect we set earlier.

DF

Distribution for the parameter picked with mean, sd, and credible interval

Predictions through space and time

# Dimension of the raster
stepsize <- 1000 # choose cell size: 1000m x 1000m
rangeX <- range(meshSpace$loc[,1])
rangeY <- range(meshSpace$loc[,2])
nxy <- round(c(diff(rangeX), 
               diff(rangeY)) / stepsize)

# Define basis of the map
mapBasis <- inla.mesh.projector(meshSpace,
                               xlim = rangeX,
                               ylim = rangeY,
                               crs = crs(gulf))


# Calculate prediction
mapMean <- vector("list", length = timeGr)
map.025 <- vector("list", length = timeGr)
map.975 <- vector("list", length = timeGr)

# for every time point
for(i in 1:timeGr){
  # Model prediction
  fitMean <- inla.mesh.project(mapBasis, 
                          model_gaussian$summary.random$field$mean[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])
  fit.025 <- inla.mesh.project(mapBasis, 
                               model_gaussian$summary.random$field$`0.025quant`[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])
  fit.975 <- inla.mesh.project(mapBasis, 
                               model_gaussian$summary.random$field$`0.975quant`[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])

  # Build maps with confidence intervals
  mapMean[[i]] <- raster(t(fitMean[,ncol(fitMean):1]),
                         xmn = min(mapBasis$x), xmx = max(mapBasis$x), 
                         ymn = min(mapBasis$y), ymx = max(mapBasis$y),
                         crs = crs(gulf))

  map.025[[i]] <- raster(t(fit.025[,ncol(fit.025):1]),
                         xmn = min(mapBasis$x), xmx = max(mapBasis$x), 
                         ymn = min(mapBasis$y), ymx = max(mapBasis$y),
                         crs = crs(gulf))
  
  map.975[[i]] <- raster(t(fit.975[,ncol(fit.975):1]),
                         xmn = min(mapBasis$x), xmx = max(mapBasis$x), 
                         ymn = min(mapBasis$y), ymx = max(mapBasis$y),
                         crs = crs(gulf))
}  

# Convert to a rasterStack (aligns the elements of a list)
rasterMean <- stack(mapMean)
raster.025 <- stack(map.025)
raster.975 <- stack(map.975)
  # to check this visually: 
  # image(mapMean[[1]])
  # plot(meshSpace, add = TRUE)

# reorganise
for(i in 1:5){
  values(rasterMean)[,i] <- as.vector(t(mapMean[[i]]))
  values(raster.025)[,i] <- as.vector(t(map.025[[i]]))
  values(raster.975)[,i] <- as.vector(t(map.975[[i]]))
}

# Mask the region to keep only the region of interest
rasterMeanMask <- mask(rasterMean, gulf)
raster.025Mask <- mask(raster.025, gulf)
raster.975Mask <- mask(raster.975, gulf)
# image(rasterMeanMask)
# plot(gulf, add = TRUE)
# Map the results
par(mfrow = c(2,3), mar = c(1,1,3,1))

for(i in 1:timeGr){
zlimRange <- range(values(raster.025Mask[[i]]),
                   values(rasterMeanMask[[i]]),
                   values(raster.975Mask[[i]]), na.rm = TRUE)
}

# convert to data frames for ggplot
raster.025Mask_df <- as.data.frame(raster.025Mask, xy = TRUE, na.rm = TRUE) %>%
  pivot_longer(cols = 3:ncol(.)) %>%
  mutate(quant = "Lower (0.025)")

rasterMeanMask_df <- as.data.frame(rasterMeanMask, xy = TRUE, na.rm = TRUE) %>%
  pivot_longer(cols = 3:ncol(.)) %>%
  mutate(quant = "Mean")

raster.975Mask_df <- as.data.frame(raster.975Mask, xy = TRUE, na.rm = TRUE) %>%
  pivot_longer(cols = 3:ncol(.)) %>%
  mutate(quant = "Higher (0.975)")

# bind together
raster_df <- rbind(raster.025Mask_df, rasterMeanMask_df, raster.975Mask_df)
raster_df$quant <- factor(raster_df$quant, levels = c("Lower (0.025)", "Mean", "Higher (0.975)"))

# make this more automatized later (label with years) 
raster_df$name <- factor(raster_df$name, levels = c("layer.1",
                                                    "layer.2",
                                                    "layer.3",
                                                    "layer.4",
                                                    "layer.5",
                                                    "layer.6",
                                                    "layer.7",
                                                    "layer.8", 
                                                    "layer.9", 
                                                    "layer.10", 
                                                    "layer.11"))

p <- ggplot() +
    geom_raster(data = filter(raster_df, quant == "Mean"), 
                aes(x = x, y = y, fill = value)) + 
    labs(fill = "Abundance") +
    scale_fill_distiller(palette = "RdBu", 
                         limits = c(-max(abs(raster_df$value)), 
                                    max(abs(raster_df$value)))) +
    #facet_wrap(~quant) +
    theme_void() +
  gganimate::transition_states(name, transition_length=.6, ) +
  labs(title = 'Time group: {closest_state}')
p

# save!
gganimate::anim_save("predictions.gif", animation = p, path = "figures/")

Each time step:

p2 <- ggplot() +
    geom_raster(data = raster_df, 
                aes(x = x, y = y, fill = value)) + 
    labs(fill = "Abundance") +
    scale_fill_distiller(palette = "RdBu", 
                         limits = c(-max(abs(raster_df$value)), 
                                    max(abs(raster_df$value)))) +
    facet_grid(name~quant) +
    theme_void() 
p2